#ctms_dataclean


# library(Rcpp)
# 
# sourcecpp_online <- function(url) {
#   fname <- "temp.cpp"
#   # cat(fname)
#   download.file(url, paste("C://", fname, sep = ""))
#   source(paste("C://", fname, sep = ""))
#   unlink(fname)
# }
# 
# sourcecpp_online("https://gitee.com/ry2an/my_public_functions_r/raw/master/pinyin.cpp")

get_col_version <- function(version = "v1"){
  id_col <- c("v1", "v2","v3")
  names_col <- list(
    c(
      "xuhao","shiyanbianhao","shiyanmingcheng","suoshuzhuanye",
      "chengdankeshi","shoushipinmingchengxiangmujiancheng",
      "zhuyaoyanjiuzhe","zhongxinbianhao","fanganbianhao","shenbanzhe",
      "crogongsi","smogongsi","guojiduozhongxin","guoneiduozhongxin",
      "zuzhangdanwei",
      "yizhongzhi","shiyanfenlei","shiyingzheng","shifouzhuce",
      "renwulaiyuan","zuoyongjizhi","shiyanmude","zhucefenlei",
      "benyuanjihuawanchenglishu","shiyanjieduan","zhaomuzhuangtai",
      "lixiangri","lunlipizhunri","xieyiqianshuri","shiyanqidongri",
      "shujusuodingri","shiyanwanchengri","ziliaoguidangri","xieyijine",
      "hetonglishu","shaixuanlishu","ruzulishu","yaowulinchuangshiyandengjihao",
      "shiyanpijianhaotongzhishubianhao","shejizonglishu","zuzhangdanweimingcheng",
      "renleiyichuanziyuanpijian","benzhongxinruzupaiming","zongzhongxinshu"
    ),
    c(
      "xuhao","shiyanbianhao","shiyanmingcheng","suoshuzhuanye",
      "chengdankeshi","shoushipinmingchengxiangmujiancheng",
      "zhuyaoyanjiuzhe","zhongxinbianhao","fanganbianhao","shenbanzhe",
      "crogongsi","smogongsi","guojiduozhongxin","guoneiduozhongxin",
      "lunlizhuangtai","zuzhangdanwei",
      "yizhongzhi","shiyanfenlei","shiyingzheng","shifouzhuce",
      "renwulaiyuan","zuoyongjizhi","shiyanmude","zhucefenlei",
      "benyuanjihuawanchenglishu","shiyanjieduan","zhaomuzhuangtai",
      "lixiangri","lunlipizhunri","xieyiqianshuri","shiyanqidongri",
      "shujusuodingri","shiyanwanchengri","ziliaoguidangri","xieyijine",
      "hetonglishu","shaixuanlishu","ruzulishu","yaowulinchuangshiyandengjihao",
      "shiyanpijianhaotongzhishubianhao","shejizonglishu","zuzhangdanweimingcheng",
      "renleiyichuanziyuanpijian","benzhongxinruzupaiming","zongzhongxinshu"
    ),
    c(
      "xuhao","shiyanbianhao","shiyanmingcheng","suoshuzhuanye",
      "chengdankeshi","shoushipinmingchengxiangmujiancheng",
      "zhuyaoyanjiuzhe","zhongxinbianhao","fanganbianhao","shenbanzhe",
      "crogongsi","smogongsi","guojiduozhongxin","guoneiduozhongxin",
      "lunlizhuangtai","zuzhangdanwei",
      "yizhongzhi","shiyanfenlei","shiyingzheng","shifouzhuce",
      "renwulaiyuan","zuoyongjizhi","shiyanmude","zhucefenlei",
      "benyuanjihuawanchenglishu","shiyanjieduan","zhaomuzhuangtai",
      "lixiangri","lunlipizhunri","xieyiqianshuri","shiyanqidongri",
      "shujusuodingri","shiyanwanchengri","ziliaoguidangri","xieyijine",
      "hetonglishu","shaixuanlishu","ruzulishu","yaowulinchuangshiyandengjihao",
      "shiyanpijianhaotongzhishubianhao","shejizonglishu","zuzhangdanweimingcheng",
      "renleiyichuanziyuanpijian","benzhongxinruzupaiming","zongzhongxinshu","badian"
    )
  )
  return(names_col[[grep(pattern = version, x = id_col)]])
}


clean_ctms <- function(data = data.frame(), col_version = "v1"){
  source("https://gitee.com/ry2an/my_public_functions_r/raw/master/replace_vector_na.R")
  source("https://gitee.com/ry2an/my_public_functions_r/raw/master/replace_substrs_vec.R")
  special_chars <- read.csv("https://gitee.com/ry2an/my_public_functions_r/raw/master/special_char.csv", header = T)$special_char
  # new_name <- c()
  # for(i in names(data)){
  #   new_name <- c(new_name, getLetter(i))
  # }
  # names(data) <- new_name
  name_col <- get_col_version(version = col_version)
  names(data) <- name_col
  # 
  # change "NA" "空" "NULL" "" to real NA
  
  data <- replace_substrs_df(raw_df = data, removepat = c(
    special_chars[22], 
    special_chars[23], "NA", "NULL"),
                             replace_vec = c("", NA, NA, NA))
  # change all na to "not ava" strictly
  data <- replace_dataframe_na(x = data)
  # change I II III IV 、，（）；
  data$shiyanmude
  data <- replace_substrs_df(raw_df = data, removepat = c(
    special_chars[18],special_chars[19],special_chars[20],special_chars[21],
    special_chars[1], special_chars[2], special_chars[13], special_chars[14], special_chars[5]),
                             replace_vec = c("I", "II", "III", "IV",
                                             ",", ",", "(", ")", ";"))
  return(data)
}

#### TEST AREA ####
# 
# setwd("D:\\ruoyanhan18537182018\\hch_trial\\clinical_trail_records\\project_checking\\20210727")
# data <- read.xlsx("data.xlsx")
# data <- clean_ctms(data)

